Completed
Push — master ( 567325...194371 )
by Elbert
01:14
created

w.resolveExcludes   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
c 0
b 0
f 0
nc 1
nop 1
dl 0
loc 3
rs 10
1
/**
2
 * Wappalyzer v4
3
 *
4
 * Created by Elbert Alias <[email protected]>
5
 *
6
 * License: GPLv3 http://www.gnu.org/licenses/gpl-3.0.txt
7
 */
8
9
var wappalyzer = (function() {
10
	//'use strict';
11
12
	/**
13
	 * Application class
14
	 */
15
	var Application = function(app, detected) {
16
		this.app             = app;
17
		this.confidence      = { };
18
		this.confidenceTotal = 0;
19
		this.detected        = Boolean(detected);
20
		this.excludes        = [ ];
21
		this.version         = '';
22
		this.versions        = [ ];
23
	};
24
25
	Application.prototype = {
26
		/**
27
		 * Calculate confidence total
28
		 */
29
		getConfidence: function() {
30
			var
31
				id,
32
				total = 0;
33
34
			for ( id in this.confidence ) {
35
				total += this.confidence[id];
36
			}
37
38
			return this.confidenceTotal = Math.min(total, 100);
39
		},
40
41
		/**
42
		 * Resolve version number (find the longest version number that contains all shorter detected version numbers)
43
		 */
44
		getVersion: function() {
45
			var i, resolved;
46
47
			if ( !this.versions.length ) {
48
				return;
49
			}
50
51
			this.versions.sort(function(a, b) {
52
				return a.length - b.length;
53
			});
54
55
			resolved = this.versions[0];
56
57
			for ( i = 1; i < this.versions.length; i++ ) {
58
				if ( this.versions[i].indexOf(resolved) === -1 ) {
59
					break;
60
				}
61
62
				resolved = this.versions[i];
63
			}
64
65
			return this.version = resolved;
66
		},
67
68
		setDetected: function(pattern, type, value, key) {
69
			this.detected = true;
70
71
			// Set confidence level
72
			this.confidence[type + ' ' + ( key ? key + ' ' : '' ) + pattern.regex] = pattern.confidence || 100;
73
74
			// Detect version number
75
			if ( pattern.version ) {
76
				var
77
					version = pattern.version,
78
					matches = pattern.regex.exec(value);
79
80
				if ( matches ) {
81
					matches.forEach(function(match, i) {
82
						// Parse ternary operator
83
						var ternary = new RegExp('\\\\' + i + '\\?([^:]+):(.*)$').exec(version);
84
85
						if ( ternary && ternary.length === 3 ) {
86
							version = version.replace(ternary[0], match ? ternary[1] : ternary[2]);
87
						}
88
89
						// Replace back references
90
						version = version.replace(new RegExp('\\\\' + i, 'g'), match || '');
91
					});
92
93
					if ( version && this.versions.indexOf(version) < 0 ) {
94
						this.versions.push(version);
95
					}
96
97
					this.getVersion();
98
				}
99
			}
100
		}
101
	};
102
103
	var asArray = function(value) {
104
		return typeof value === 'string' ? [ value ] : value;
105
	};
106
107
	/**
108
	 * Call driver functions
109
	 */
110
	var driver = function(func, args) {
111
		if ( typeof w.driver[func] !== 'function' ) {
112
			w.log('not implemented: w.driver.' + func, 'warn');
113
114
			return;
115
		}
116
117
		return w.driver[func](args);
118
	};
119
120
	/**
121
	 * Parse apps.json patterns
122
	 */
123
	var parsePatterns = function(patterns) {
124
		var
125
			key,
126
			parsed = {};
127
128
		// Convert string to object containing array containing string
129
		if ( typeof patterns === 'string' || patterns instanceof Array ) {
130
			patterns = {
131
				main: asArray(patterns)
132
			};
133
		}
134
135
		for ( key in patterns ) {
136
			parsed[key] = [];
137
138
			asArray(patterns[key]).forEach(function(pattern) {
139
				var attrs = {};
140
141
				pattern.split('\\;').forEach(function(attr, i) {
142
					if ( i ) {
143
						// Key value pairs
144
						attr = attr.split(':');
145
146
						if ( attr.length > 1 ) {
147
							attrs[attr.shift()] = attr.join(':');
148
						}
149
					} else {
150
						attrs.string = attr;
151
152
						try {
153
							attrs.regex = new RegExp(attr.replace('/', '\/'), 'i'); // Escape slashes in regular expression
154
						} catch (e) {
155
							attrs.regex = new RegExp();
156
157
							w.log(e + ': ' + attr, 'error');
158
						}
159
					}
160
				});
161
162
				parsed[key].push(attrs);
163
			});
164
		}
165
166
		// Convert back to array if the original pattern list was an array (or string)
167
		if ( parsed.hasOwnProperty('main') ) {
168
			parsed = parsed.main;
169
		}
170
171
		return parsed;
172
	};
173
174
	/**
175
	 * Main script
176
	 */
177
	var w = {
178
		apps: {},
179
		cats: null,
180
		ping: {
181
			hostnames: { }
182
		},
183
		adCache: [],
184
		detected: {},
185
186
		config: {
187
			websiteURL: 'https://wappalyzer.com/',
188
			twitterURL: 'https://twitter.com/Wappalyzer',
189
			githubURL: 'https://github.com/AliasIO/Wappalyzer',
190
		},
191
192
		validation: {
193
			hostname: /(www.)?((.+?)\.(([a-z]{2,3}\.)?[a-z]{2,6}))$/,
194
			hostnameBlacklist: /((local|dev(elopment)?|stag(e|ing)?|test(ing)?|demo(shop)?|admin|google|cache)\.|\/admin|\.local)/
195
		},
196
197
		/**
198
		 * Log messages to console
199
		 */
200
		log: function(message, type) {
201
			driver('log', {
202
				message: JSON.stringify(message),
203
				type: type || 'debug'
204
			});
205
		},
206
207
		/**
208
		 * Initialize
209
		 */
210
		init: function() {
211
			w.log('w.init');
212
213
			// Initialize driver
214
			if ( w.driver !== undefined ) {
215
				driver('init');
216
			} else {
217
				w.log('No driver, exiting');
218
			}
219
		},
220
221
		/**
222
		 * Analyze the request
223
		 */
224
		analyze: function(hostname, url, data) {
225
			var
226
				app,
227
				apps = {};
228
229
			w.log('w.analyze');
230
231
			if ( w.apps === undefined || w.categories === undefined ) {
232
				w.log('apps.json not loaded, check for syntax errors');
233
234
				return;
235
			}
236
237
			// Remove hash from URL
238
			data.url = url = url.split('#')[0];
239
240
			if ( typeof data.html !== 'string' ) {
241
				data.html = '';
242
			}
243
244
			if ( w.detected[url] === undefined ) {
245
				w.detected[url] = {};
246
			}
247
248
			for ( app in w.apps ) {
249
				apps[app] = w.detected[url] && w.detected[url][app] ? w.detected[url][app] : new Application(app);
250
251
				if ( url ) {
252
					w.analyzeUrl(apps[app], url);
253
				}
254
255
				if ( data.html ) {
256
					w.analyzeHtml(apps[app], data.html);
257
					w.analyzeScript(apps[app], data.html);
258
					w.analyzeMeta(apps[app], data.html);
259
				}
260
261
				if ( data.headers ) {
262
					w.analyzeHeaders(apps[app], data.headers);
263
				}
264
265
				if ( data.env ) {
266
					w.analyzeEnv(apps[app], data.env);
267
				}
268
			}
269
270
			for ( app in apps ) {
271
				if ( !apps[app].detected ) {
272
					delete apps[app];
273
				}
274
			}
275
276
			w.resolveExcludes(apps);
277
			w.resolveImplies(apps, url);
278
279
			w.cacheDetectedApps(apps, url);
280
			w.trackDetectedApps(apps, url, hostname, data.html);
281
282
			w.log(Object.keys(apps).length + ' apps detected: ' + Object.keys(apps).join(', ') + ' on ' + url);
283
284
			driver('displayApps');
285
		},
286
287
		resolveExcludes: function(apps) {
288
			var
289
				app,
290
				excludes = [];
291
292
			// Exclude app in detected apps only
293
			for ( app in apps ) {
294
				if ( w.apps[app].excludes ) {
295
					asArray(w.apps[app]).excludes.forEach(function(excluded) {
296
						excludes.push(excluded);
297
					});
298
				}
299
			}
300
301
			// Remove excluded applications
302
			for ( app in apps ) {
303
				if ( excludes.indexOf(app) !== -1 ) {
304
					delete apps[app];
305
				}
306
			}
307
		},
308
309
		resolveImplies: function(apps, url) {
310
			var
311
				confidence,
312
				id,
313
				checkImplies = true;
314
315
			// Implied applications
316
			// Run several passes as implied apps may imply other apps
317
			while ( checkImplies ) {
318
				checkImplies = false;
319
320
				for ( app in apps ) {
321
					confidence = apps[app].confidence;
322
323
					if ( w.apps[app] && w.apps[app].implies ) {
324
						asArray(w.apps[app].implies).forEach(function(implied) {
325
							implied = parsePatterns(implied)[0];
326
327
							if ( !w.apps[implied.string] ) {
328
								w.log('Implied application ' + implied.string + ' does not exist', 'warn');
329
330
								return;
331
							}
332
333
							if ( !apps.hasOwnProperty(implied.string) ) {
334
								apps[implied.string] = w.detected[url] && w.detected[url][implied.string] ? w.detected[url][implied.string] : new Application(implied.string, true);
335
336
								checkImplies = true;
337
							}
338
339
							// Apply app confidence to implied app
340
							for ( id in confidence ) {
341
								apps[implied.string].confidence[id + ' implied by ' + app] = confidence[id] * ( implied.confidence ? implied.confidence / 100 : 1 );
342
							}
343
						});
344
					}
345
				}
346
			}
347
		},
348
349
		/**
350
		 * Cache detected applications
351
		 */
352
		cacheDetectedApps: function(apps, url) {
353
			var app, id, confidence;
354
355
			for ( app in apps ) {
356
				confidence = apps[app].confidence;
357
358
				// Per URL
359
				w.detected[url][app] = apps[app];
360
361
				for ( id in confidence ) {
362
					w.detected[url][app].confidence[id] = confidence[id];
363
				}
364
			}
365
		},
366
367
		/**
368
		 * Track detected applications
369
		 */
370
		trackDetectedApps: function(apps, url, hostname, html) {
371
			var app, match;
372
373
			for ( app in apps ) {
374
				if ( w.detected[url][app].getConfidence() >= 100 && w.validation.hostname.test(hostname) && !w.validation.hostnameBlacklist.test(url) ) {
375
					if ( !w.ping.hostnames.hasOwnProperty(hostname) ) {
376
						w.ping.hostnames[hostname] = {
377
							applications: {},
378
							meta: {}
379
						};
380
					}
381
382
					if ( !w.ping.hostnames[hostname].applications.hasOwnProperty(app) ) {
383
						w.ping.hostnames[hostname].applications[app] = {
384
							hits: 0
385
						};
386
					}
387
388
					w.ping.hostnames[hostname].applications[app].hits ++;
389
390
					if ( apps[app].version ) {
391
						w.ping.hostnames[hostname].applications[app].version = apps[app].version;
392
					}
393
				}
394
			}
395
396
			// Additional information
397
			if ( w.ping.hostnames.hasOwnProperty(hostname) ) {
398
				match = html.match(/<html[^>]*[: ]lang="([a-z]{2}((-|_)[A-Z]{2})?)"/i);
399
400
				if ( match && match.length ) {
401
					w.ping.hostnames[hostname].meta['language'] = match[1];
402
				}
403
			}
404
405
			if ( Object.keys(w.ping.hostnames).length >= 50 || w.adCache.length >= 50 ) {
406
				driver('ping');
407
			}
408
		},
409
410
		/**
411
		 * Analyze URL
412
		 */
413
		analyzeUrl: function(app, url) {
414
			var patterns = parsePatterns(w.apps[app.app].url);
415
416
			if ( patterns.length ) {
417
				patterns.forEach(function(pattern) {
418
					if ( pattern.regex.test(url) ) {
419
						app.setDetected(pattern, 'url', url);
420
					}
421
				});
422
			}
423
		},
424
425
		/**
426
		 * Analyze HTML
427
		 */
428
		analyzeHtml: function(app, html) {
429
			var patterns = parsePatterns(w.apps[app.app].html);
430
431
			if ( patterns.length ) {
432
				patterns.forEach(function(pattern) {
433
					if ( pattern.regex.test(html) ) {
434
						app.setDetected(pattern, 'html', html);
435
					}
436
				});
437
			}
438
		},
439
440
		/**
441
		 * Analyze script tag
442
		 */
443
		analyzeScript: function(app, html) {
444
			var
445
				regex = new RegExp('<script[^>]+src=("|\')([^"\']+)', 'ig'),
446
				patterns = parsePatterns(w.apps[app.app].script);
447
448
			if ( patterns.length ) {
449
				patterns.forEach(function(pattern) {
450
					var match;
451
452
					while ( match = regex.exec(html) ) {
453
						if ( pattern.regex.test(match[2]) ) {
454
							app.setDetected(pattern, 'script', match[2]);
455
						}
456
					}
457
				});
458
			}
459
		},
460
461
		/**
462
		 * Analyze meta tag
463
		 */
464
		analyzeMeta: function(app, html) {
465
			var
466
				content, match, meta,
467
				regex = /<meta[^>]+>/ig,
468
				patterns = parsePatterns(w.apps[app.app].meta);
469
470
			if ( patterns.length ) {
471
				while ( match = regex.exec(html) ) {
472
					for ( meta in patterns ) {
473
						if ( new RegExp('(name|property)=["\']' + meta + '["\']', 'i').test(match) ) {
474
							content = match.toString().match(/content=("|')([^"']+)("|')/i);
475
476
							patterns[meta].forEach(function(pattern) {
477
								if ( content && content.length === 4 && pattern.regex.test(content[2]) ) {
478
									app.setDetected(pattern, 'meta', content[2], meta);
479
								}
480
							});
481
						}
482
					}
483
				}
484
			}
485
		},
486
487
		/**
488
		 * analyze response headers
489
		 */
490
		analyzeHeaders: function(app, headers) {
491
			var
492
				header,
493
				patterns = parsePatterns(w.apps[app.app].headers);
494
495
			if ( headers ) {
496
				for ( header in patterns ) {
497
					patterns[header].forEach(function(pattern) {
498
						header = header.toLowerCase();
499
500
						if ( headers.hasOwnProperty(header) && pattern.regex.test(headers[header]) ) {
501
							app.setDetected(pattern, 'headers', headers[header], header);
502
						}
503
					});
504
				}
505
			}
506
		},
507
508
		/**
509
		 * Analyze environment variables
510
		 */
511
		analyzeEnv: function(app, envs) {
512
			var patterns = parsePatterns(w.apps[app.app].env);
513
514
			if ( patterns.length ) {
515
				patterns.forEach(function(pattern) {
516
					var env;
517
518
					for ( env in envs ) {
519
						if ( pattern.regex.test(envs[env]) ) {
520
							app.setDetected(pattern, 'env', envs[env]);
521
						}
522
					}
523
				});
524
			}
525
		}
526
	};
527
528
	return w;
529
})();
530
531
// CommonJS package
532
// See http://wiki.commonjs.org/wiki/CommonJS
533
if ( typeof exports === 'object' ) {
534
	exports.wappalyzer = wappalyzer;
535
}
536